/*
 * kmp_lock.h -- lock header file
 */


//===----------------------------------------------------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.txt for details.
//
//===----------------------------------------------------------------------===//


#ifndef KMP_LOCK_H
#define KMP_LOCK_H

#include <limits.h>    // CHAR_BIT
#include <stddef.h>    // offsetof

#include "kmp_os.h"
#include "kmp_debug.h"

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

// ----------------------------------------------------------------------------
// Have to copy these definitions from kmp.h because kmp.h cannot be included
// due to circular dependencies.  Will undef these at end of file.

#define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
#define KMP_GTID_DNE (-2)

// Forward declaration of ident and ident_t

struct ident;
typedef struct ident ident_t;

// End of copied code.
// ----------------------------------------------------------------------------

//
// We need to know the size of the area we can assume that the compiler(s)
// allocated for obects of type omp_lock_t and omp_nest_lock_t.  The Intel
// compiler always allocates a pointer-sized area, as does visual studio.
//
// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
// intel archs.  It allocates at least 8 bytes for nested lock (more on
// recent versions), but we are bounded by the pointer-sized chunks that
// the Intel compiler allocates.
//

#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT)
# define OMP_LOCK_T_SIZE        sizeof(int)
# define OMP_NEST_LOCK_T_SIZE   sizeof(void *)
#else
# define OMP_LOCK_T_SIZE        sizeof(void *)
# define OMP_NEST_LOCK_T_SIZE   sizeof(void *)
#endif

//
// The Intel compiler allocates a 32-byte chunk for a critical section.
// Both gcc and visual studio only allocate enough space for a pointer.
// Sometimes we know that the space was allocated by the Intel compiler.
//
#define OMP_CRITICAL_SIZE       sizeof(void *)
#define INTEL_CRITICAL_SIZE     32

//
// lock flags
//
typedef kmp_uint32 kmp_lock_flags_t;

#define kmp_lf_critical_section 1

//
// When a lock table is used, the indices are of kmp_lock_index_t
//
typedef kmp_uint32 kmp_lock_index_t;

//
// When memory allocated for locks are on the lock pool (free list),
// it is treated as structs of this type.
//
struct kmp_lock_pool {
    union kmp_user_lock *next;
    kmp_lock_index_t index;
};

typedef struct kmp_lock_pool kmp_lock_pool_t;


extern void __kmp_validate_locks( void );


// ----------------------------------------------------------------------------
//
//  There are 5 lock implementations:
//
//       1. Test and set locks.
//       2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture)
//       3. Ticket (Lamport bakery) locks.
//       4. Queuing locks (with separate spin fields).
//       5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
//
//   and 3 lock purposes:
//
//       1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time.
//          These do not require non-negative global thread ID's.
//       2. Internal RTL locks -- Used everywhere else in the RTL
//       3. User locks (includes critical sections)
//
// ----------------------------------------------------------------------------


// ============================================================================
// Lock implementations.
// ============================================================================


// ----------------------------------------------------------------------------
// Test and set locks.
//
// Non-nested test and set locks differ from the other lock kinds (except
// futex) in that we use the memory allocated by the compiler for the lock,
// rather than a pointer to it.
//
// On lin32, lin_32e, and win_32, the space allocated may be as small as 4
// bytes, so we have to use a lock table for nested locks, and avoid accessing
// the depth_locked field for non-nested locks.
//
// Information normally available to the tools, such as lock location,
// lock usage (normal lock vs. critical section), etc. is not available with
// test and set locks.
// ----------------------------------------------------------------------------

struct kmp_base_tas_lock {
    volatile kmp_int32 poll;         // 0 => unlocked
                                     // locked: (gtid+1) of owning thread
    kmp_int32          depth_locked; // depth locked, for nested locks only
};

typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;

union kmp_tas_lock {
    kmp_base_tas_lock_t lk;
    kmp_lock_pool_t pool;   // make certain struct is large enough
    double lk_align;        // use worst case alignment
                            // no cache line padding
};

typedef union kmp_tas_lock kmp_tas_lock_t;

//
// Static initializer for test and set lock variables. Usage:
//    kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
//
#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } }

extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck );
extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck );

extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck );
extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );

#define KMP_LOCK_RELEASED       1
#define KMP_LOCK_STILL_HELD     0
#define KMP_LOCK_ACQUIRED_FIRST 1
#define KMP_LOCK_ACQUIRED_NEXT  0

#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)

// ----------------------------------------------------------------------------
// futex locks.  futex locks are only available on Linux* OS.
//
// Like non-nested test and set lock, non-nested futex locks use the memory
// allocated by the compiler for the lock, rather than a pointer to it.
//
// Information normally available to the tools, such as lock location,
// lock usage (normal lock vs. critical section), etc. is not available with
// test and set locks.  With non-nested futex locks, the lock owner is not
// even available.
// ----------------------------------------------------------------------------

struct kmp_base_futex_lock {
    volatile kmp_int32 poll;         // 0 => unlocked
                                     // 2*(gtid+1) of owning thread, 0 if unlocked
                                     // locked: (gtid+1) of owning thread
    kmp_int32          depth_locked; // depth locked, for nested locks only
};

typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;

union kmp_futex_lock {
    kmp_base_futex_lock_t lk;
    kmp_lock_pool_t pool;   // make certain struct is large enough
    double lk_align;        // use worst case alignment
                            // no cache line padding
};

typedef union kmp_futex_lock kmp_futex_lock_t;

//
// Static initializer for futex lock variables. Usage:
//    kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
//
#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } }

extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck );
extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck );

extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );

#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)


// ----------------------------------------------------------------------------
// Ticket locks.
// ----------------------------------------------------------------------------

struct kmp_base_ticket_lock {
    // `initialized' must be the first entry in the lock data structure!
    volatile union kmp_ticket_lock * initialized;  // points to the lock union if in initialized state
    ident_t const *     location;     // Source code location of omp_init_lock().
    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires
    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock
    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked
    kmp_int32           depth_locked; // depth locked, for nested locks only
    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock
};

typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;

union KMP_ALIGN_CACHE kmp_ticket_lock {
    kmp_base_ticket_lock_t lk;       // This field must be first to allow static initializing.
    kmp_lock_pool_t pool;
    double                 lk_align; // use worst case alignment
    char                   lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ];
};

typedef union kmp_ticket_lock kmp_ticket_lock_t;

//
// Static initializer for simple ticket lock variables. Usage:
//    kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
// Note the macro argument. It is important to make var properly initialized.
//
#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } }

extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck );
extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck );

extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck );
extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck );


// ----------------------------------------------------------------------------
// Queuing locks.
// ----------------------------------------------------------------------------

#if KMP_USE_ADAPTIVE_LOCKS

struct kmp_adaptive_lock_info;

typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;

#if KMP_DEBUG_ADAPTIVE_LOCKS

struct kmp_adaptive_lock_statistics {
    /* So we can get stats from locks that haven't been destroyed. */
    kmp_adaptive_lock_info_t * next;
    kmp_adaptive_lock_info_t * prev;

    /* Other statistics */
    kmp_uint32 successfulSpeculations;
    kmp_uint32 hardFailedSpeculations;
    kmp_uint32 softFailedSpeculations;
    kmp_uint32 nonSpeculativeAcquires;
    kmp_uint32 nonSpeculativeAcquireAttempts;
    kmp_uint32 lemmingYields;
};

typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;

extern void __kmp_print_speculative_stats();
extern void __kmp_init_speculative_stats();

#endif // KMP_DEBUG_ADAPTIVE_LOCKS

struct kmp_adaptive_lock_info
{
    /* Values used for adaptivity.
     * Although these are accessed from multiple threads we don't access them atomically,
     * because if we miss updates it probably doesn't matter much. (It just affects our
     * decision about whether to try speculation on the lock).
     */
    kmp_uint32 volatile badness;
    kmp_uint32 volatile acquire_attempts;
    /* Parameters of the lock. */
    kmp_uint32 max_badness;
    kmp_uint32 max_soft_retries;

#if KMP_DEBUG_ADAPTIVE_LOCKS
    kmp_adaptive_lock_statistics_t volatile stats;
#endif
};

#endif // KMP_USE_ADAPTIVE_LOCKS


struct kmp_base_queuing_lock {

    //  `initialized' must be the first entry in the lock data structure!
    volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state.

    ident_t const *     location;     // Source code location of omp_init_lock().

    KMP_ALIGN( 8 )                    // tail_id  must be 8-byte aligned!

    volatile kmp_int32  tail_id;      // (gtid+1) of thread at tail of wait queue, 0 if empty
                                      // Must be no padding here since head/tail used in 8-byte CAS
    volatile kmp_int32  head_id;      // (gtid+1) of thread at head of wait queue, 0 if empty
                                      // Decl order assumes little endian
    // bakery-style lock
    volatile kmp_uint32 next_ticket;  // ticket number to give to next thread which acquires
    volatile kmp_uint32 now_serving;  // ticket number for thread which holds the lock
    volatile kmp_int32  owner_id;     // (gtid+1) of owning thread, 0 if unlocked
    kmp_int32           depth_locked; // depth locked, for nested locks only

    kmp_lock_flags_t    flags;        // lock specifics, e.g. critical section lock
};

typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;

KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 );

union KMP_ALIGN_CACHE kmp_queuing_lock {
    kmp_base_queuing_lock_t lk;       // This field must be first to allow static initializing.
    kmp_lock_pool_t pool;
    double                   lk_align; // use worst case alignment
    char                     lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ];
};

typedef union kmp_queuing_lock kmp_queuing_lock_t;

extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck );
extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck );

extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck );
extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck );

#if KMP_USE_ADAPTIVE_LOCKS

// ----------------------------------------------------------------------------
// Adaptive locks.
// ----------------------------------------------------------------------------
struct kmp_base_adaptive_lock {
    kmp_base_queuing_lock qlk;
    KMP_ALIGN(CACHE_LINE)
    kmp_adaptive_lock_info_t adaptive;     // Information for the speculative adaptive lock
};

typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;

union KMP_ALIGN_CACHE kmp_adaptive_lock {
    kmp_base_adaptive_lock_t lk;
    kmp_lock_pool_t pool;
    double lk_align;
    char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ];
};
typedef union kmp_adaptive_lock kmp_adaptive_lock_t;

# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk)

#endif // KMP_USE_ADAPTIVE_LOCKS

// ----------------------------------------------------------------------------
// DRDPA ticket locks.
// ----------------------------------------------------------------------------

struct kmp_base_drdpa_lock {
    //
    // All of the fields on the first cache line are only written when
    // initializing or reconfiguring the lock.  These are relatively rare
    // operations, so data from the first cache line will usually stay
    // resident in the cache of each thread trying to acquire the lock.
    //
    // initialized must be the first entry in the lock data structure!
    //
    KMP_ALIGN_CACHE

    volatile union kmp_drdpa_lock * initialized;    // points to the lock union if in initialized state
    ident_t const *                 location;       // Source code location of omp_init_lock().
    volatile struct kmp_lock_poll {
        kmp_uint64 poll;
    } * volatile polls;
    volatile kmp_uint64             mask;           // is 2**num_polls-1 for mod op
    kmp_uint64                      cleanup_ticket; // thread with cleanup ticket
    volatile struct kmp_lock_poll * old_polls;      // will deallocate old_polls
    kmp_uint32                      num_polls;      // must be power of 2

    //
    // next_ticket it needs to exist in a separate cache line, as it is
    // invalidated every time a thread takes a new ticket.
    //
    KMP_ALIGN_CACHE

    volatile kmp_uint64             next_ticket;

    //
    // now_serving is used to store our ticket value while we hold the lock.
    // It has a slightly different meaning in the DRDPA ticket locks (where
    // it is written by the acquiring thread) than it does in the simple
    // ticket locks (where it is written by the releasing thread).
    //
    // Since now_serving is only read an written in the critical section,
    // it is non-volatile, but it needs to exist on a separate cache line,
    // as it is invalidated at every lock acquire.
    //
    // Likewise, the vars used for nested locks (owner_id and depth_locked)
    // are only written by the thread owning the lock, so they are put in
    // this cache line.  owner_id is read by other threads, so it must be
    // declared volatile.
    //
    KMP_ALIGN_CACHE

    kmp_uint64                      now_serving;    // doesn't have to be volatile
    volatile kmp_uint32             owner_id;       // (gtid+1) of owning thread, 0 if unlocked
    kmp_int32                       depth_locked;   // depth locked
    kmp_lock_flags_t                flags;          // lock specifics, e.g. critical section lock
};

typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;

union KMP_ALIGN_CACHE kmp_drdpa_lock {
    kmp_base_drdpa_lock_t lk;       // This field must be first to allow static initializing. */
    kmp_lock_pool_t pool;
    double                lk_align; // use worst case alignment
    char                  lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ];
};

typedef union kmp_drdpa_lock kmp_drdpa_lock_t;

extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck );
extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck );

extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck );


// ============================================================================
// Lock purposes.
// ============================================================================


// ----------------------------------------------------------------------------
// Bootstrap locks.
// ----------------------------------------------------------------------------

// Bootstrap locks -- very few locks used at library initialization time.
// Bootstrap locks are currently implemented as ticket locks.
// They could also be implemented as test and set lock, but cannot be
// implemented with other lock kinds as they require gtids which are not
// available at initialization time.

typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;

#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )

static inline int
__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
    return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
}

static inline int
__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
    return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
}

static inline void
__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
    __kmp_release_ticket_lock( lck, KMP_GTID_DNE );
}

static inline void
__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
    __kmp_init_ticket_lock( lck );
}

static inline void
__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
{
    __kmp_destroy_ticket_lock( lck );
}


// ----------------------------------------------------------------------------
// Internal RTL locks.
// ----------------------------------------------------------------------------

//
// Internal RTL locks are also implemented as ticket locks, for now.
//
// FIXME - We should go through and figure out which lock kind works best for
// each internal lock, and use the type declaration and function calls for
// that explicit lock kind (and get rid of this section).
//

typedef kmp_ticket_lock_t kmp_lock_t;

static inline int
__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
    return __kmp_acquire_ticket_lock( lck, gtid );
}

static inline int
__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
    return __kmp_test_ticket_lock( lck, gtid );
}

static inline void
__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
{
    __kmp_release_ticket_lock( lck, gtid );
}

static inline void
__kmp_init_lock( kmp_lock_t *lck )
{
    __kmp_init_ticket_lock( lck );
}

static inline void
__kmp_destroy_lock( kmp_lock_t *lck )
{
    __kmp_destroy_ticket_lock( lck );
}


// ----------------------------------------------------------------------------
// User locks.
// ----------------------------------------------------------------------------

//
// Do not allocate objects of type union kmp_user_lock!!!
// This will waste space unless __kmp_user_lock_kind == lk_drdpa.
// Instead, check the value of __kmp_user_lock_kind and allocate objects of
// the type of the appropriate union member, and cast their addresses to
// kmp_user_lock_p.
//

enum kmp_lock_kind {
    lk_default = 0,
    lk_tas,
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
    lk_futex,
#endif
#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
    lk_hle,
    lk_rtm,
#endif
    lk_ticket,
    lk_queuing,
    lk_drdpa,
#if KMP_USE_ADAPTIVE_LOCKS
    lk_adaptive
#endif // KMP_USE_ADAPTIVE_LOCKS
};

typedef enum kmp_lock_kind kmp_lock_kind_t;

extern kmp_lock_kind_t __kmp_user_lock_kind;

union kmp_user_lock {
    kmp_tas_lock_t     tas;
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
    kmp_futex_lock_t   futex;
#endif
    kmp_ticket_lock_t  ticket;
    kmp_queuing_lock_t queuing;
    kmp_drdpa_lock_t   drdpa;
#if KMP_USE_ADAPTIVE_LOCKS
    kmp_adaptive_lock_t     adaptive;
#endif // KMP_USE_ADAPTIVE_LOCKS
    kmp_lock_pool_t    pool;
};

typedef union kmp_user_lock *kmp_user_lock_p;

#if ! KMP_USE_DYNAMIC_LOCK

extern size_t __kmp_base_user_lock_size;
extern size_t __kmp_user_lock_size;

extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );

static inline kmp_int32
__kmp_get_user_lock_owner( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
    return ( *__kmp_get_user_lock_owner_ )( lck );
}

extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)

#define __kmp_acquire_user_lock_with_checks(lck,gtid)                                           \
    if (__kmp_user_lock_kind == lk_tas) {                                                       \
        if ( __kmp_env_consistency_check ) {                                                    \
            char const * const func = "omp_set_lock";                                           \
            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )                               \
                && lck->tas.lk.depth_locked != -1 ) {                                           \
                KMP_FATAL( LockNestableUsedAsSimple, func );                                    \
            }                                                                                   \
            if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) {                          \
                KMP_FATAL( LockIsAlreadyOwned, func );                                          \
            }                                                                                   \
        }                                                                                       \
        if ( ( lck->tas.lk.poll != 0 ) ||                                                       \
          ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \
            kmp_uint32 spins;                                                                   \
            KMP_FSYNC_PREPARE( lck );                                                           \
            KMP_INIT_YIELD( spins );                                                            \
            if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \
                KMP_YIELD( TRUE );                                                              \
            } else {                                                                            \
                KMP_YIELD_SPIN( spins );                                                        \
            }                                                                                   \
            while ( ( lck->tas.lk.poll != 0 ) ||                                                \
              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  )  {       \
                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
                    KMP_YIELD( TRUE );                                                          \
                } else {                                                                        \
                    KMP_YIELD_SPIN( spins );                                                    \
                }                                                                               \
            }                                                                                   \
        }                                                                                       \
        KMP_FSYNC_ACQUIRED( lck );                                                              \
    } else {                                                                                    \
        KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );                       \
        ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );                                 \
    }

#else
static inline int
__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
    return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );
}
#endif

extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)

#include "kmp_i18n.h"                       /* AC: KMP_FATAL definition */
extern int __kmp_env_consistency_check;     /* AC: copy from kmp.h here */
static inline int
__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    if ( __kmp_user_lock_kind == lk_tas ) {
        if ( __kmp_env_consistency_check ) {
            char const * const func = "omp_test_lock";
            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
                && lck->tas.lk.depth_locked != -1 ) {
                KMP_FATAL( LockNestableUsedAsSimple, func );
            }
        }
        return ( ( lck->tas.lk.poll == 0 ) &&
          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
    } else {
        KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
        return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
    }
}
#else
static inline int
__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
    return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
}
#endif

extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

static inline void
__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
    ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid );
}

extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );

static inline void
__kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
    ( *__kmp_init_user_lock_with_checks_ )( lck );
}

//
// We need a non-checking version of destroy lock for when the RTL is
// doing the cleanup as it can't always tell if the lock is nested or not.
//
extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );

static inline void
__kmp_destroy_user_lock( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
    ( *__kmp_destroy_user_lock_ )( lck );
}

extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );

static inline void
__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
    ( *__kmp_destroy_user_lock_with_checks_ )( lck );
}

extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)

#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth)                                  \
    if (__kmp_user_lock_kind == lk_tas) {                                                           \
        if ( __kmp_env_consistency_check ) {                                                        \
            char const * const func = "omp_set_nest_lock";                                          \
            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )                              \
                && lck->tas.lk.depth_locked == -1 ) {                                               \
                KMP_FATAL( LockSimpleUsedAsNestable, func );                                        \
            }                                                                                       \
        }                                                                                           \
        if ( lck->tas.lk.poll - 1 == gtid ) {                                                       \
            lck->tas.lk.depth_locked += 1;                                                          \
            *depth = KMP_LOCK_ACQUIRED_NEXT;                                                        \
        } else {                                                                                    \
            if ( ( lck->tas.lk.poll != 0 ) ||                                                       \
              ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {            \
                kmp_uint32 spins;                                                                   \
                KMP_FSYNC_PREPARE( lck );                                                           \
                KMP_INIT_YIELD( spins );                                                            \
                if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) {     \
                    KMP_YIELD( TRUE );                                                              \
                } else {                                                                            \
                    KMP_YIELD_SPIN( spins );                                                        \
                }                                                                                   \
                while ( ( lck->tas.lk.poll != 0 ) ||                                                \
                  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) )  ) {        \
                    if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
                        KMP_YIELD( TRUE );                                                          \
                    } else {                                                                        \
                        KMP_YIELD_SPIN( spins );                                                    \
                    }                                                                               \
                }                                                                                   \
            }                                                                                       \
            lck->tas.lk.depth_locked = 1;                                                           \
            *depth = KMP_LOCK_ACQUIRED_FIRST;                                                       \
        }                                                                                           \
        KMP_FSYNC_ACQUIRED( lck );                                                                  \
    } else {                                                                                        \
        KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );                    \
        *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );                     \
    }

#else
static inline void
__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth )
{
    KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
    *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );
}
#endif

extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
static inline int
__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    if ( __kmp_user_lock_kind == lk_tas ) {
        int retval;
        if ( __kmp_env_consistency_check ) {
            char const * const func = "omp_test_nest_lock";
            if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )
                && lck->tas.lk.depth_locked == -1 ) {
                KMP_FATAL( LockSimpleUsedAsNestable, func );
            }
        }
        KMP_DEBUG_ASSERT( gtid >= 0 );
        if ( lck->tas.lk.poll - 1 == gtid ) {   /* __kmp_get_tas_lock_owner( lck ) == gtid */
            return ++lck->tas.lk.depth_locked;  /* same owner, depth increased */
        }
        retval = ( ( lck->tas.lk.poll == 0 ) &&
          KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
        if ( retval ) {
            KMP_MB();
            lck->tas.lk.depth_locked = 1;
        }
        return retval;
    } else {
        KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
        return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
    }
}
#else
static inline int
__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
    return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
}
#endif

extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );

static inline int
__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
{
    KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
    return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid );
}

extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );

static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
    ( *__kmp_init_nested_user_lock_with_checks_ )( lck );
}

extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );

static inline void
__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
{
    KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
    ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck );
}

//
// user lock functions which do not necessarily exist for all lock kinds.
//
// The "set" functions usually have wrapper routines that check for a NULL set
// function pointer and call it if non-NULL.
//
// In some cases, it makes sense to have a "get" wrapper function check for a
// NULL get function pointer and return NULL / invalid value / error code if
// the function pointer is NULL.
//
// In other cases, the calling code really should differentiate between an
// unimplemented function and one that is implemented but returning NULL /
// invalied value.  If this is the case, no get function wrapper exists.
//

extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck );

// no set function; fields set durining local allocation

extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );

static inline const ident_t *
__kmp_get_user_lock_location( kmp_user_lock_p lck )
{
    if ( __kmp_get_user_lock_location_  != NULL ) {
        return ( *__kmp_get_user_lock_location_ )( lck );
    }
    else {
        return NULL;
    }
}

extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );

static inline void
__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
{
    if ( __kmp_set_user_lock_location_  != NULL ) {
        ( *__kmp_set_user_lock_location_ )( lck, loc );
    }
}

extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck );

extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );

static inline void
__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
{
    if ( __kmp_set_user_lock_flags_  != NULL ) {
        ( *__kmp_set_user_lock_flags_ )( lck, flags );
    }
}

//
// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t.
//
extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind );

//
// Macros for binding user lock functions.
//
#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) {                                       \
    __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
                                                  __kmp_acquire##nest##kind##_##suffix;         \
    __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
                                                  __kmp_release##nest##kind##_##suffix;         \
    __kmp_test##nest##user_lock_with_checks_    = ( int (*)( kmp_user_lock_p, kmp_int32 ) )     \
                                                  __kmp_test##nest##kind##_##suffix;            \
    __kmp_init##nest##user_lock_with_checks_    = ( void (*)( kmp_user_lock_p ) )               \
                                                  __kmp_init##nest##kind##_##suffix;            \
    __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) )               \
                                                  __kmp_destroy##nest##kind##_##suffix;         \
}

#define KMP_BIND_USER_LOCK(kind)                    KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)        KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
#define KMP_BIND_NESTED_USER_LOCK(kind)             KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)

// ----------------------------------------------------------------------------
// User lock table & lock allocation
// ----------------------------------------------------------------------------

/*
    On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which
    is not enough to store a pointer, so we have to use lock indexes instead of pointers and
    maintain lock table to map indexes to pointers.


    Note: The first element of the table is not a pointer to lock! It is a pointer to previously
    allocated table (or NULL if it is the first table).

    Usage:

        if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
            Lock table is fully utilized. User locks are indexes, so table is
            used on user lock operation.
            Note: it may be the case (lin_32) that we don't need to use a lock
            table for regular locks, but do need the table for nested locks.
        }
        else {
            Lock table initialized but not actually used.
        }
*/

struct kmp_lock_table {
    kmp_lock_index_t  used;      // Number of used elements
    kmp_lock_index_t  allocated; // Number of allocated elements
    kmp_user_lock_p * table;     // Lock table.
};

typedef struct kmp_lock_table kmp_lock_table_t;

extern kmp_lock_table_t __kmp_user_lock_table;
extern kmp_user_lock_p __kmp_lock_pool;

struct kmp_block_of_locks {
    struct kmp_block_of_locks * next_block;
    void *                      locks;
};

typedef struct kmp_block_of_locks kmp_block_of_locks_t;

extern kmp_block_of_locks_t *__kmp_lock_blocks;
extern int __kmp_num_locks_in_block;

extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
extern void __kmp_cleanup_user_locks();

#define KMP_CHECK_USER_LOCK_INIT() \
        {                                                               \
            if ( ! TCR_4( __kmp_init_user_locks ) ) {                   \
                __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );      \
                if ( ! TCR_4( __kmp_init_user_locks ) ) {               \
                    TCW_4( __kmp_init_user_locks, TRUE );               \
                }                                                       \
                __kmp_release_bootstrap_lock( &__kmp_initz_lock );      \
            }                                                           \
        }

#endif // KMP_USE_DYNAMIC_LOCK

#undef KMP_PAD
#undef KMP_GTID_DNE

#if KMP_USE_DYNAMIC_LOCK

//
// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current
// compatibility. Essential functionality of this new code is dynamic dispatch, but it also
// implements (or enables implementation of) hinted user lock and critical section which will be
// part of OMP 4.1 soon.
//
// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock
// function call on the created lock object requires type extraction and call through jump table
// using the extracted type. This type information is stored in two different ways depending on
// the size of the lock object, and we differentiate lock types by this size requirement - direct
// and indirect locks.
//
// Direct locks:
// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and
// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage
// for the lock type, and appropriate bit operation is required to access the data meaningful to
// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB
// of the lock object. The newly introduced "hle" lock is also a direct lock.
//
// Indirect locks:
// An indirect lock object requires more space than the compiler-generated space, and it should be
// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e.,
// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated
// indirect lock (void * fits in the object) or an index to the indirect lock table entry that
// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly
// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock.
// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to
// differentiate the lock from a direct lock, and the remaining part is the actual index to the
// indirect lock table.
//

#include <stdint.h> // for uintptr_t

// Shortcuts
#define KMP_USE_FUTEX          (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
#define KMP_USE_INLINED_TAS    (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
#define KMP_USE_INLINED_FUTEX  KMP_USE_FUTEX && 0

// List of lock definitions; all nested locks are indirect locks.
// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
// All nested locks are indirect lock types.
#if KMP_USE_TSX
# if KMP_USE_FUTEX
#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a) m(hle, a)
#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \
                                    m(nested_queuing, a) m(nested_drdpa, a)
# else
#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a)             m(hle, a)
#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
                                    m(nested_tas, a)                    m(nested_ticket, a)         \
                                    m(nested_queuing, a) m(nested_drdpa, a)
# endif // KMP_USE_FUTEX
# define KMP_LAST_D_LOCK lockseq_hle
#else
# if KMP_USE_FUTEX
#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a) m(futex, a)
#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \
                                    m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)         \
                                    m(nested_queuing, a) m(nested_drdpa, a)
#  define KMP_LAST_D_LOCK lockseq_futex
# else
#  define KMP_FOREACH_D_LOCK(m, a)  m(tas, a)
#  define KMP_FOREACH_I_LOCK(m, a)  m(ticket, a) m(queuing, a)                m(drdpa, a)           \
                                    m(nested_tas, a)                    m(nested_ticket, a)         \
                                    m(nested_queuing, a) m(nested_drdpa, a)
#  define KMP_LAST_D_LOCK lockseq_tas
# endif // KMP_USE_FUTEX
#endif // KMP_USE_TSX

// Information used in dynamic dispatch
#define KMP_LOCK_SHIFT   8 // number of low bits to be used as tag for direct locks
#define KMP_FIRST_D_LOCK lockseq_tas
#define KMP_FIRST_I_LOCK lockseq_ticket
#define KMP_LAST_I_LOCK  lockseq_nested_drdpa
#define KMP_NUM_I_LOCKS  (locktag_nested_drdpa+1) // number of indirect lock types

// Base type for dynamic locks.
typedef kmp_uint32 kmp_dyna_lock_t;

// Lock sequence that enumerates all lock kinds.
// Always make this enumeration consistent with kmp_lockseq_t in the include directory.
typedef enum {
    lockseq_indirect = 0,
#define expand_seq(l,a) lockseq_##l,
    KMP_FOREACH_D_LOCK(expand_seq, 0)
    KMP_FOREACH_I_LOCK(expand_seq, 0)
#undef expand_seq
} kmp_dyna_lockseq_t;

// Enumerates indirect lock tags.
typedef enum {
#define expand_tag(l,a) locktag_##l,
    KMP_FOREACH_I_LOCK(expand_tag, 0)
#undef expand_tag
} kmp_indirect_locktag_t;

// Utility macros that extract information from lock sequences.
#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK)
#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1)

// Enumerates direct lock tags starting from indirect tag.
typedef enum {
#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
    KMP_FOREACH_D_LOCK(expand_tag, 0)
#undef expand_tag
} kmp_direct_locktag_t;

// Indirect lock type
typedef struct {
    kmp_user_lock_p lock;
    kmp_indirect_locktag_t type;
} kmp_indirect_lock_t;

// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking.
extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
extern int  (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
extern int  (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);

// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking.
extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
extern int  (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
extern int  (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);

// Extracts direct lock tag from a user lock pointer
#define KMP_EXTRACT_D_TAG(l)   (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1))

// Extracts indirect lock index from a user lock pointer
#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)

// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type).
#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]

// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type).
#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]

// Initializes a direct lock with the given lock pointer and lock sequence.
#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)

// Initializes an indirect lock with the given lock pointer and lock sequence.
#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)

// Returns "free" lock value for the given lock type.
#define KMP_LOCK_FREE(type)      (locktag_##type)

// Returns "busy" lock value for the given lock teyp.
#define KMP_LOCK_BUSY(v, type)   ((v)<<KMP_LOCK_SHIFT | locktag_##type)

// Returns lock value after removing (shifting) lock tag.
#define KMP_LOCK_STRIP(v)        ((v)>>KMP_LOCK_SHIFT)

// Initializes global states and data structures for managing dynamic user locks.
extern void __kmp_init_dynamic_user_locks();

// Allocates and returns an indirect lock with the given indirect lock tag.
extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);

// Cleans up global states and data structures for managing dynamic user locks.
extern void __kmp_cleanup_indirect_user_locks();

// Default user lock sequence when not using hinted locks. 
extern kmp_dyna_lockseq_t __kmp_user_lock_seq;

// Jump table for "set lock location", available only for indirect locks.
extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *);
#define KMP_SET_I_LOCK_LOCATION(lck, loc) {                         \
    if (__kmp_indirect_set_location[(lck)->type] != NULL)           \
        __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
}

// Jump table for "set lock flags", available only for indirect locks.
extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t);
#define KMP_SET_I_LOCK_FLAGS(lck, flag) {                         \
    if (__kmp_indirect_set_flags[(lck)->type] != NULL)            \
        __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
}

// Jump table for "get lock location", available only for indirect locks.
extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL       \
                                      ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
                                      : NULL )

// Jump table for "get lock flags", available only for indirect locks.
extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL       \
                                   ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
                                   : NULL )

#define KMP_I_LOCK_CHUNK 1024       // number of kmp_indirect_lock_t objects to be allocated together

// Lock table for indirect locks.
typedef struct kmp_indirect_lock_table {
    kmp_indirect_lock_t **table;    // blocks of indirect locks allocated
    kmp_lock_index_t size;          // size of the indirect lock table
    kmp_lock_index_t next;          // index to the next lock to be allocated
} kmp_indirect_lock_table_t;

extern kmp_indirect_lock_table_t __kmp_i_lock_table;

// Returns the indirect lock associated with the given index.
#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK)

// Number of locks in a lock block, which is fixed to "1" now.
// TODO: No lock block implementation now. If we do support, we need to manage lock block data
// structure for each indirect lock type.
extern int __kmp_num_locks_in_block;

// Fast lock table lookup without consistency checking
#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *))       \
                               ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
                               : *((kmp_indirect_lock_t **)(l)) )

// Used once in kmp_error.c
extern kmp_int32
__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);

#else // KMP_USE_DYNAMIC_LOCK

# define KMP_LOCK_BUSY(v, type)    (v)
# define KMP_LOCK_FREE(type)       0
# define KMP_LOCK_STRIP(v)         (v)

#endif // KMP_USE_DYNAMIC_LOCK

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

#endif /* KMP_LOCK_H */

